###############################################################################################
## Multiple Imputation Replication R Code for
## No Reservations: International Order and Demand for the Renminbi as a Reserve Currency 
## Authors: Steven Liao, Daniel E. McDowell
###############################################################################################

###############################################################################################
## CAUTION: the following code is computationally intensive to run on a personal computer
## We implement parallel MI by submitting 10 jobs to 10 different nodes of a cluster
## For each job we only request 1 multiply imputed dataset using the code below
## Each job is also implemented in parallel within nodes by the number of CPU (cores)
## We combine the 10 multiply imputed datasets once all 10 jobs are complete
## The following code simply demonstrates how to replicate our multiple imputation dataset if one
## could run the code on a single supercomputer
## For further details on Multiple imputation on University of Virginia's ITS Linux cluster with Amelia
## Please see <https://github.com/stevenliaotw/parallel> or contact Steven Liao <stevenliao@princeton.edu>
################################################################################################

# clear all objects in workspace
rm(list = ls())
date()

# load library
library(Amelia)
#library(parallel)
#library(multicore)
library(foreign)

#Get command line argument
cmdArgs = commandArgs(trailingOnly=TRUE)
inputNum = 1
runNum = as.numeric(cmdArgs[inputNum])
print(paste("Run Number:",as.character(runNum)))

numCores = 16

# set seed
set.seed(1234*runNum)

# set directory
MAIN_DIR <- "/Users/stevenliao/Dropbox/Research/No Reservations/replication"
# MAIN_DIR <- "replace above with your own directory here"

# load data
load(paste(MAIN_DIR, "data/merge.RData", sep = "/"))

# set bounds
bds <- matrix(c(52, -1, 1, # s2un_us
                53, -1, 1, # s3un_us 
                54, 0, 1, # agree2un_us
                55, 0, 1, # agree3un_us
                56, -1, 1, # s2un_cn
                57, -1, 1, # s3un_cn
                58, 0, 1, #agree2un_cn
                59, 0, 1, # agree3un_cn
                83, 0, 1), # ka_open
              byrow = TRUE, 
              nrow = 9, ncol = 3)

# run amelia
res.mi <- amelia(x = data, 
                 m = 1, 
                 idvars = c("iso2c", "scode", "country", 
                            "execrlc",
                            "rmbres", "rmbres_lead", "rmbres_swf", "rmbres_swf_lead"),
                 ords = c("polity2", "irto_cumsum_cbi", "execrlc_ord"),
                 sqrt = c("res_gdp_cur", "res_imp", "res_debt",
                          "chfdi_inflow",
                          "pcnexpdep", "pcnimpdep", "pusexpdep", "pusimpdep", 
                          "pcnexpdepgdp", "pcnimpdepgdp", "pusexpdepgdp", "pusimpdepgdp", "pcnusexpdep",
                          "gdp_2005_wdi_share",
                          "imf_vote_gdp_2005_rate",
                          "cn_aid_dep_aiddata", 
                          "us_mil_aid_dep", "cn_invest_dep",
                          "ka_open"),
                 logs = c("gdp_2005_wdi", "gdp_pc_2005_wdi", "gdp_cur_wdi",
                          "rgdpepc",
                          "rgdpe",
                          "pwexp", "pwimp", "pcnexp", "pcnimp", "pusexp", "pusimp",
                          "us_mil_aid",
                          "invest_sum_mil"),
                 lags = c("gdp_2005_wdi", "gdp_pc_2005_wdi", "gdp_growth_wdi", "rgdpepc", "gdp_cur_wdi",
                          "res_imp", "res_debt", "res_gdp_cur",
                          "ca_bal_gdp_wdi", "ca_bal_cur_wdi", 
                          "ppgd_other", 
                          "inflate",
                          "polity2", "uds_mean",
                          "rgdpe",
                          "chfdi_inflow", "chfdi_outflow", "pwfdi_iflo_un", 
                          "gdp_2005_wdi_share", 
                          "imf_vote_gdp_2005_dif",
                          "imf_vote_gdp_2005_rate",
                          "us_econ_aid", "us_mil_aid", "us_tot_aid", 
                          "us_econ_aid_dep", "us_mil_aid_dep", "us_tot_aid_dep",
                          "cn_aid_dep_aiddata",
                          "invest_sum_mil", "cn_invest_dep",
                          "s2un_us", "s3un_us", "agree2un_us", "agree3un_us",  
                          "s2un_cn", "s3un_cn", "agree2un_cn", "agree3un_cn", 
                          "ipd_us", "ipd_cn",
                          "pwexp", "pwimp", "pcnexp", "pcnimp", 
                          "pcnexpdep", "pcnimpdep", "pusexpdep", "pusimpdep", 
                          "pcnexpdepgdp", "pcnimpdepgdp", "pusexpdepgdp", "pusimpdepgdp", "pcnusexpdep", "pwexp_0709_dif",
                          "pfdidep", "pfdidepgdp",
                          "irto_cumsum_cbi", 
                          "execrlc_ord",
                          "ka_open"),
                 leads = c("gdp_2005_wdi", "gdp_pc_2005_wdi", "gdp_growth_wdi", "rgdpepc", "gdp_cur_wdi",
                           "res_imp", "res_debt", "res_gdp_cur",
                           "ca_bal_gdp_wdi", "ca_bal_cur_wdi", 
                           "ppgd_other", 
                           "inflate",
                           "polity2", "uds_mean",
                           "rgdpe",
                           "chfdi_inflow", "chfdi_outflow", "pwfdi_iflo_un", 
                           "gdp_2005_wdi_share",
                           "imf_vote_gdp_2005_dif",
                           "imf_vote_gdp_2005_rate", 
                           "us_econ_aid", "us_mil_aid", "us_tot_aid", 
                           "us_econ_aid_dep", "us_mil_aid_dep", "us_tot_aid_dep",
                           "cn_aid_dep_aiddata",
                           "invest_sum_mil", "cn_invest_dep",
                           "s2un_us", "s3un_us", "agree2un_us", "agree3un_us",  
                           "s2un_cn", "s3un_cn", "agree2un_cn", "agree3un_cn", 
                           "ipd_us", "ipd_cn",
                           "pwexp", "pwimp", "pcnexp", "pcnimp", 
                           "pcnexpdep", "pcnimpdep", "pusexpdep", "pusimpdep", 
                           "pcnexpdepgdp", "pcnimpdepgdp", "pusexpdepgdp", "pusimpdepgdp", "pcnusexpdep", "pwexp_0709_dif",
                           "pfdidep", "pfdidepgdp",
                           "irto_cumsum_cbi", 
                           "execrlc_ord",
                           "ka_open"),
                 cs = "iso3c",        
                 ts = "year",
                 bounds = bds,
                 empri = .05*nrow(data),
                 parallel = "multicore",
                 ncpus = numCores)


#####################
## Diagnostics
#####################
data.mi <- res.mi
summary(data.mi)
#missmap(data.mi)
# density plot comparison
pdf(file = "density.pdf", width = 12, height = 12)
par(mfrow = c(26, 3))
plot(data.mi, which.vars = c(10:80, 82:87))
dev.off()

# overimputation
pdf(file = "overimpute.pdf", width = 12, height = 60)
par(mfrow=c(24, 3))
overimpute(data.mi, var = "gdp_2005_wdi") 
overimpute(data.mi, var = "gdp_cur_wdi") 
overimpute(data.mi, var = "gdp_pc_2005_wdi") 
overimpute(data.mi, var = "gdp_growth_wdi") 
overimpute(data.mi, var = "rgdpepc") 
overimpute(data.mi, var = "res_gdp_cur") 
overimpute(data.mi, var = "res_imp") 
overimpute(data.mi, var = "res_debt") 
overimpute(data.mi, var = "ca_bal_gdp_wdi") 
overimpute(data.mi, var = "ca_bal_cur_wdi") 
overimpute(data.mi, var = "ppgd_other") 
overimpute(data.mi, var = "inflate") 
overimpute(data.mi, var = "polity2") 
overimpute(data.mi, var = "uds_mean") 
overimpute(data.mi, var = "rgdpe") 
overimpute(data.mi, var = "chfdi_inflow")
overimpute(data.mi, var = "chfdi_outflow")
overimpute(data.mi, var = "pwfdi_iflo_un")
overimpute(data.mi, var = "gdp_2005_wdi_share") 
overimpute(data.mi, var = "imf_vote_gdp_2005_dif") 
overimpute(data.mi, var = "imf_vote_gdp_2005_rate") 
overimpute(data.mi, var = "us_econ_aid") 
overimpute(data.mi, var = "us_mil_aid") 
overimpute(data.mi, var = "us_tot_aid") 
overimpute(data.mi, var = "us_econ_aid_dep") 
overimpute(data.mi, var = "us_mil_aid_dep") 
overimpute(data.mi, var = "us_tot_aid_dep") 
overimpute(data.mi, var = "cn_aid_dep_aiddata") 
overimpute(data.mi, var = "invest_sum_mil") 
overimpute(data.mi, var = "cn_invest_dep") 
overimpute(data.mi, var = "s2un_us")
overimpute(data.mi, var = "s3un_us")
overimpute(data.mi, var = "agree2un_us")
overimpute(data.mi, var = "agree3un_us")
overimpute(data.mi, var = "s2un_cn")
overimpute(data.mi, var = "s3un_cn")
overimpute(data.mi, var = "agree2un_cn")
overimpute(data.mi, var = "agree3un_cn")
overimpute(data.mi, var = "ipd_us")
overimpute(data.mi, var = "ipd_cn")
overimpute(data.mi, var = "pwexp")
overimpute(data.mi, var = "pwimp")
overimpute(data.mi, var = "pcnexp")
overimpute(data.mi, var = "pcnimp")
overimpute(data.mi, var = "pusexp")
overimpute(data.mi, var = "pusimp")
overimpute(data.mi, var = "pcnexpdep")
overimpute(data.mi, var = "pcnimpdep")
overimpute(data.mi, var = "pusexpdep")
overimpute(data.mi, var = "pusimpdep")
overimpute(data.mi, var = "pcnexpdepgdp")
overimpute(data.mi, var = "pcnimpdepgdp")
overimpute(data.mi, var = "pusexpdepgdp")
overimpute(data.mi, var = "pusimpdepgdp")
overimpute(data.mi, var = "pcnusexpdep")
overimpute(data.mi, var = "pwexp_0709_dif")
overimpute(data.mi, var = "pfdidep")
overimpute(data.mi, var = "pfdidepgdp")
overimpute(data.mi, var = "irto_cumsum_cbi")
overimpute(data.mi, var = "execrlc_ord")
overimpute(data.mi, var = "ka_open")
dev.off()


####################################
## post imputation transformation
####################################
# recreate variables
data.mi <- transform(data.mi, pcnexpdep = pcnexp/pwexp) 
data.mi <- transform(data.mi, pcnimpdep = pcnimp/pwimp) 
data.mi <- transform(data.mi, pcnexpdepgdp = pcnexp/gdp_cur_wdi)
data.mi <- transform(data.mi, pcnimpdepgdp = pcnimp/gdp_cur_wdi)
data.mi <- transform(data.mi, pusexpdep = pusexp/pwexp) 
data.mi <- transform(data.mi, pusimpdep = pusimp/pwimp) 
data.mi <- transform(data.mi, pusexpdepgdp = pusexp/gdp_cur_wdi)
data.mi <- transform(data.mi, pusimpdepgdp = pusimp/gdp_cur_wdi)
data.mi <- transform(data.mi, pcnusexpdep = pcnexpdep/pusexpdep) 
data.mi <- transform(data.mi, pfdidep = chfdi_outflow/pwfdi_iflo_un)
data.mi <- transform(data.mi, pfdidepgdp = chfdi_outflow/gdp_cur_wdi*1000000)
data.mi <- transform(data.mi, us_econ_aid_dep = us_econ_aid/gdp_cur_wdi)
data.mi <- transform(data.mi, us_mil_aid_dep = us_mil_aid/gdp_cur_wdi)
data.mi <- transform(data.mi, us_tot_aid_dep = us_tot_aid/gdp_cur_wdi)
data.mi <- transform(data.mi, cn_invest_dep = invest_sum_mil*1000000/gdp_cur_wdi)
data.mi <- transform(data.mi, imf_vote_gdp_2005_dif = imf_vote - gdp_2005_wdi_share)
data.mi <- transform(data.mi, imf_vote_gdp_2005_rate = imf_vote/gdp_2005_wdi_share)
data.mi <- transform(data.mi, execrlc_bi = ifelse(execrlc_ord == 3, 1, 0))
data.mi <- transform(data.mi, dist_k = dist/1000)

summary(data.mi)

# save data
save(data.mi, file = "reserve_mi.RData")
write.amelia(data.mi, file.stem = "reserve_mi", extension = ".dta", format = "dta")

#################################################
## collapse country-year dataset to just country
#################################################
data.mi.cs <- llply(data.mi$imputations, function(x) {
  out <- x %>%
    group_by(iso3c) %>%
    summarise(country = head(country, 1),
              pwexp_0709_dif = head(pwexp_0709_dif, 1),
              rmbres = ifelse(any(rmbres == 1), 1, 0),
              rmbres_lead = ifelse(any(rmbres_lead == 1), 1, 0),
              rmbres_swf = ifelse(any(rmbres_swf == 1), 1, 0),
              rmbres_swf_lead = ifelse(any(rmbres_swf_lead == 1), 1, 0),
              swap = ifelse(any(swap == 1), 1, 0),
              scofull = ifelse(any(scofull == 1), 1, 0),
              execrlc_bi = ifelse(any(execrlc_bi == 1), 1, 0),
              g24 = ifelse(any(g24 == 1), 1, 0),
              g24_brics = ifelse(any(g24_brics == 1), 1, 0),
              sco_g24_brics = ifelse(any(sco_g24_brics == 1), 1, 0),
              twn_rec = ifelse(any(twn_rec == 1), 1, 0),
              gdp_2005_wdi = mean(gdp_2005_wdi),
              gdp_pc_2005_wdi = mean(gdp_pc_2005_wdi),
              ppgd_other = mean(ppgd_other),
              pcnimpdep = mean(pcnimpdep),
              pcnimpdepgdp = mean(pcnimpdepgdp),
              pcnexpdep = mean(pcnexpdep),
              pcnexpdepgdp = mean(pcnexpdepgdp),
              pusexpdep = mean(pusexpdep),
              pusexpdepgdp = mean(pusexpdepgdp),
              pcnusexpdep = mean(pcnusexpdep),
              ka_open = mean(ka_open),
              irto_cumsum_cbi = mean(irto_cumsum_cbi),
              pfdidep = mean(pfdidep),
              cn_aid_dep_aiddata = mean(cn_aid_dep_aiddata),
              cn_visits = mean(cn_visits),
              imf_vote_gdp_2005_dif = mean(imf_vote_gdp_2005_dif),
              imf_vote_gdp_2005_rate = mean(imf_vote_gdp_2005_rate),
              us_econ_aid_dep = mean(us_econ_aid_dep),
              us_mil_aid_dep = mean(us_mil_aid_dep),
              us_tot_aid_dep = mean(us_tot_aid_dep),
              troops = mean(troops),
              troops_pc = mean(troops_pc),
              res_imp = mean(res_imp),
              res_debt = mean(res_debt),
              res_gdp_cur = mean(res_gdp_cur),
              s3un_cn = mean(s3un_cn),
              s3un_us = mean(s3un_us),
              s2un_cn = mean(s2un_cn),
              s2un_us = mean(s2un_us),
              ipd_cn = mean(ipd_cn),
              ipd_us = mean(ipd_us),
              dist = mean(dist),
              dist_k = mean(dist_k),
              polity2 = median(polity2))
  out <- as.data.frame(out)
}
)

save(data.mi.cs, file = "reserve_mi_cs.RData")
